---
title: "Online Supporting Information"
author:
- Gabriel Lenz
- Alexander Sahn
bibliography: covariates.bib
date: "`r format(Sys.time(), '%B %d, %Y')`"
output:
  pdf_document:
    citation_package: natbib
    fig_caption: yes
    keep_tex: yes
    toc: yes
    toc_depth: 2
  html_document:
    df_print: paged
    toc: yes
    toc_depth: '2'
fontsize: 12pt
geometry: margin=1in
fontfamily: mathpazo
spacing: double
subtitle: Achieving Statistical Significance with Control Variables and without Transparency
thanks: ''
biblio-style: apsr
---


\newpage
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, fig.lp="")
```

```{r load, message=FALSE, warning=FALSE, include=FALSE}
#Load Libraries
library(tidyverse)
library(magrittr)
library(knitr)
library(gridExtra)
library(ggridges)
library(readxl)
library(here)

wd <- here()
setwd(wd)

load("reanalysis.Rdata")

STATA <- t(read.csv("replication_commands.csv"))
colnames(STATA) = STATA[1, ] # the first row will be the header
STATA = as.data.frame(STATA[-1, ] )         # removing the first row.
STATA <- subset(STATA, select=-c(V18:V23))


bv_mv <- bv_mv %>% mutate(mv_bp = ifelse(mv_b<0, mv_b*-1,mv_b), #recode so that multivariate is always positive and 
                          bv_bp = ifelse(mv_b<0, bv_b*-1,bv_b), # if Multivariate is negative, Flip sign Of bivariate
                          mv_bp = ifelse(bv_bp<0, mv_bp+bv_bp,mv_bp), #If bivariate and multivariate change signs, which only happens if Bivariate is less than zero
                          bv_bp = ifelse(bv_bp<0, bv_bp+bv_bp,bv_bp),
                          inflate = (log(mv_bp+1) - log(bv_bp+1))*100,
                          inflate2 = (log(mv_bp+2) - log(bv_bp+2))*100,
                          inflate5 = (log(mv_bp+5) - log(bv_bp+5))*100,
                          inflate10 = (log(mv_bp+10) - log(bv_bp+10))*100,
                          linflate = ifelse(inflate>0,log(inflate),log(inflate*-1)*-1),  
                          linflate = ifelse(inflate==0,0,linflate), 
                          inflate_p =  ifelse(sign(bv_b)!=sign(mv_b), (mv_p-1) - (1-bv_p), mv_p- bv_p ),
                          inflate_lp = (log(mv_p+1)-log(bv_p+1))*100,
                          inflate_se = (log(mv_se+1) - log(bv_se+1)) * 100,
                          n_mv = mv_n/bv_n * 100,
                          pinflate = abs((mv_b - bv_b) / bv_b) * ifelse(abs(mv_b)>abs(bv_b), 1, -1) * 100,
                          pinflate_se = (mv_se - bv_se) / bv_se * 100)


#Merge in replication info spreadsheet
info <- read_excel("replication_info.xlsx", sheet = "Full")
merged <- inner_join(bv_mv, 
                     subset(dplyr::rename(info, author = first_author), status!="Excluded"), by="author")
# inflation summary stats
inflation.stats <- function(name, data){
  n <- nrow(data)
  mean <- mean(data$inflate)
  mean_r2 <- sum(data$inflate * ifelse(is.na(data$mv_r2), data$mv_r2p, data$mv_r2), na.rm=T) / sum(!is.na(ifelse(is.na(data$mv_r2), data$mv_r2p, data$mv_r2)))
  med <- median(data$inflate)
  g0 <- mean(data$inflate>0)
  g10 <- mean(data$inflate>10)
  mean_se <- mean(data$inflate_se)
  med_se <- median(data$inflate_se)
  g0_se <- mean(data$inflate_se>0)
  out <- c(name, n, mean, mean_r2, med, g0, g10, mean_se, med_se, g0_se)
  names(out) <- c('name', 'n', 'mean', 'r2 weighted mean', 'median', 'g0', 'g10', 'se_mean', 'se_median', 'se_g0')
  return(out)
}

inf <- rbind(inflation.stats('all', bv_mv), inflation.stats('experiments', subset(merged, type %in% 1:3)), 
                         inflation.stats('observational', subset(merged, type %in% c(0,4))),
                         inflation.stats('bivariate specification presented', subset(merged, no_covar_presented>0)),
                         inflation.stats('bivariate specification presented and ns', subset(merged, no_covar_presented>0 & bv_p>0.05)),
                         inflation.stats('bivariate p > 0.05', subset(bv_mv, bv_p>=.05)),
                         inflation.stats('bivariate p > 0.05 - observational', subset(merged, (type==0|type==4) & bv_p>=.05)),
                         inflation.stats('full specification .045 < p < .055', subset(bv_mv, mv_p>=.045 & mv_p<=.055)),
                         inflation.stats('different n in full specification', subset(bv_mv, bv_n!=mv_n)),
                         inflation.stats('experiments with no bivariate spec', subset(merged, type %in% 1:3 & no_covar_presented==0)),
                         inflation.stats('experiments with bivariate spec', subset(merged, type %in% 1:3 & no_covar_presented>0)),
                         inflation.stats('observational with no bivariate spec', subset(merged, (type==0|type==4) & no_covar_presented==0)),
                         inflation.stats('observational with bivariate spec', subset(merged, (type==0|type==4) & no_covar_presented>0)),
                         inflation.stats('observational with no bivariate spec and p > 0.05', subset(merged, bv_p>0.05 & (type==0|type==4) & no_covar_presented==0)),
                         inflation.stats('observational with no bivariate spec and p > 0.05 + wrong sign', subset(merged, (bv_p>0.05 | sign(bv_b)!=sign(mv_b)) & (type==0|type==4) & no_covar_presented==0)))

#Percent not showing inflation from insignificance to significance
hdp <-round(as.numeric(inf[which(inf=='observational with no bivariate spec and p > 0.05 + wrong sign'),2])/as.numeric(inf[which(inf=='observational'),2])*100, digits = 0)
```


# Quotes on Suppression Effects

"Ideally, including suppressor variables in a model should be theory based and every regression model should include using a test for suppressor effects. This approach allows researchers to become ware of the suppressor effect of a particular variable and to be better able to explain when regression results change drastically from one model to another."  [@pandey_suppressor_2010]

"Suppression and multicollinearity present challenging conditions for those who wish to apply multiple regression analysis." [@beckstead_isolating_2012]

"The authors discuss definitions of the suppressor phenomenon, show how the unwary researcher can be warned against it, and present guidelines for the interpretation of the results."  [@maassen_suppressor_2001]

"In his influential book, Personality and Prediction, Jerry Wiggins (1973) concluded: "the case for suppressor variables remains to be demonstrated" (p. 38). The impact of the book on a generation of personality researchers led them, quite rightly, to be skeptical about the value of suppressor variables. Wiggins's conclusion was based partly on Ghiselli's (1972) failure to replicate a suppressor effect, leading him to liken suppressor variables to the ephemeral 'will-o-the wisp' (p. 270). Wiggins was particularly persuaded by the disappointing results reported by Goldberg, Rorer, and Green (1970). Since those earlier warnings, summary evaluations of the utility of suppressor variables in behavioral science have remained guarded at best (e.g., Cohen & Cohen, 1992; Pedhazur, 1982). Even very recently, Maassen and Baker (2001) warned against devoting energy to formulating theoretical explanations for solitary suppression results.''  [@paulhus_two_2004]

"The most difficult interpretive problem, however, occurs in the face of suppression when relations between an independent variable and a dependent variable increase or change direction following partialling (e.g., Cohen & Cohen, 1983; Darlington, 1968; Horst, 1941). This situation poses the greatest interpretive hazard because a relation that did not exist or did not exist as strongly before partialling is now uncovered'' [@lynam_perils_2006].

"Suppression effects in multiple linear regression are one of the most elusive phenomena in the educational and psychological measurement literature.'' [@kim_causal_2019]

We note that some of these authors are using suppression effects to describe a very particular case where the suppressor variable is uncorrelated with the dependent variable.

## Quotes on Controls Introducing Bias

"Omitting both the fixed effects and the unobserved confounder was preferable to adjusting for the fixed effects precisely because the two biases counterbalanced one another in the unadjusted estimate. In practice, a researcher is unlikely to know whether adjusting for covariates will unmask unobserved confounder bias. Similar observations have led to somewhat pessimistic assessment of observational analysis, for example, in Clarke (2005) and Frisell et al. (2012) (but see also Clarke [2009])." [@middleton_bias_2016]

"Using a simple linear (regression) setting with two confounders  one observed (X), the other unobserved (U) we demonstrate that conditioning on the observed confounder X does not necessarily imply that the confounding bias decreases, even if X is highly correlated with U. That is, adjusting for X may increase instead of reduce the omitted variable bias (OVB). Two phenomena can cause an increasing OVB: (i) bias amplification and (ii) cancellation of offsetting biases." [@steiner_mechanics_2016]

"A key underlying assumption is that the danger posed by omitted variable bias can be ameliorated by the inclusion of relevant control variables. Unfortunately, as this article demonstrates, there is nothing in the mathematics of regression analysis that supports this conclusion. The inclusion of additional control variables may increase or decrease the bias, and we cannot know for sure which is the case in any particular situation." [@clarke_phantom_2005]

"Scholars often assume that the danger posed by omitted variable bias can be ameliorated by the inclusion of large numbers of relevant control variables. However, there is nothing in the mathematics of regression analysis that supports this conclusion. This paper goes beyond textbook treatments of omitted variable bias and shows, both for OLS and for generalized linear models, that the inclusion of additional control variables may increase or decrease the bias, and we cannot know for sure which is the case in any particular situation. The last section of the paper shows how formal sensitivity analysis can be used to determine whether omitted variables are a problem. A substantive example demonstrates the method."  [@clarke_return_2009]



\newpage 
# Method
According to our pre-analysis plan (see anonymous link: https://goo.gl/tCeNFM), we selected articles and models from within these articles according to the following rules. We mark aspects of our plan that evolved as we collected the data in italics.

## Article Selection Rules
We included _AJPS_ articles from volumes 57-59, because the strict replication data and code posting was fully in effect for publish articles by volume 57. 

We selected articles according to the following rules:

* The article must rely on a statistical model (excludes experiments without controls, formal models, political theory, simulations, etc.).
* The article's analysis uses *a standard statistical model* and replication code and data is available.
* The article's analysis uses three or more controls.
    + *To increase the number of studies, we lowered the threshold to just one control.*
* The article's publication rest primary on a single, non-null result (excludes findings driven by multiple results or null results).

Table S1 presents a count of the articles excluded by the reason for exclusion.

## Model Selection Rules 
Given that papers often have multiple hypotheses and multiple tests of each hypothesis, we selected one statistical model from each paper according to the following criteria:

* We selected a model that is the main finding of the paper-referenced in the abstract or in body.
* If there are multiple specifications of the same model with different sets of controls, we chose the model with the full set of controls.
* If there are multiple cases or measures of the key independent variable and the text does not point to one specifically, *we used the summary measure, if the paper presents one, and if not,* we used the model with the smallest absolute value coefficient on the key variable.
* We included *any variables necessary for the research design or identification strategy*.


\newpage 
# Table S1: Reasons for Exclusion

```{r echo = FALSE, results = 'asis'}

t <- table(info$exclude_reason)
t <- as.matrix(addmargins(t) )
row.names(t)[10]  <-"Total articles excluded"
colnames(t) <- ""
kable(t)
```

"Nonstandard statistical model" are those without a function in Stata or R.

"Plethora of claims" describes articles that did not depend on one key statistically significant result, but on several results.


We exclude all AJPS "workshop" articles.

\newpage


# Figure S1: P-value Changes in All Studies

In the plot below, we show the average change in p-values from the bivariate to the full specification for *all* articles, not just those that do not disclose the bivariate. The figure first shows p-value changes all studies, observational and experimental, with 95% confidence intervals. It next shows the key finding in the paper, that observational studies showing a bivariate specification have only a trivial amount of p-value decreases, but that observational studies that do not show a bivariate specification have a larger amount. Next, it shows that the p-value decreases occur in studies with fixed effects and without fixed effects. It then shows the p-value changes for all three _AJPS_ volumes we analyze and all three subfields. Finally, it shows estimates with below the median number of controls and above the median number of controls. The mean number of controls in these studies is nine and the median is eight. In the paper, we show similar statistics only for studies that did not to show a bivariate specification.

&nbsp;&nbsp;

```{r, echo=F, message=FALSE, warning=FALSE}
#amount of p-value change plot (by fixed effects, no fixed effects, volume, etc.)
merged_STATA <- inner_join(merged, STATA, by="author")
merged_STATA$fixed_effects.x[is.na(merged_STATA$fixed_effects.x)] <- 0
merged_STATA$flip <- ifelse(sign(merged_STATA$bv_b)!=sign(merged_STATA$mv_b), 1, 0)

merged_STATA$inflate_p <- ifelse(merged_STATA$flip==1, (merged_STATA$mv_p -1) - (1-merged_STATA$bv_p), merged_STATA$mv_p- merged_STATA$bv_p )
merged_STATA$n_covariates <- stringr::str_count(merged_STATA$covariates, " ") + stringr::str_count(merged_STATA$fixed_effects.y, " ")
median_covar <- median(merged_STATA$n_covariates)

inflate.plot.ci <- function(name, dat){
  #tt <- t.test(dat$bv_p, dat$mv_p_flip, paired=TRUE)
  mean <- mean(dat$inflate_p)
  sd <- sd(dat$inflate_p)
  n <- nrow(dat)
  ci <- 1.9 * sd / sqrt(n)
  out <- c(paste0(name," (n=",n,")"), mean, sd, ci)
  names(out) <- c("name", "mean", "sd", "ci")
  return(out)
}
plot_data <- inflate.plot.ci("All", merged_STATA)
plot_data <- rbind(plot_data, inflate.plot.ci("Observational", subset(merged_STATA, type %in% c(0,4))))
plot_data <- rbind(plot_data, inflate.plot.ci("Experimental", subset(merged_STATA, type %in% c(1:3))))
plot_data <- rbind(plot_data, inflate.plot.ci("Obs., Bivariate Specif. Shown", subset(merged_STATA, type %in% c(0,4) & no_covar_presented > 0)))
plot_data <- rbind(plot_data, inflate.plot.ci("Obs., No Bivariate Specif. Shown", subset(merged_STATA, type %in% c(0,4) & no_covar_presented==0)))
plot_data <- rbind(plot_data, inflate.plot.ci("Obs., Fixed Effects", subset(merged_STATA, type %in% c(0,4) & fixed_effects.x>0)))
plot_data <- rbind(plot_data, inflate.plot.ci("Obs., No Fixed Effects", subset(merged_STATA, type %in% c(0,4) & fixed_effects.x==0)))
plot_data <- rbind(plot_data, inflate.plot.ci("Volume 57", subset(merged_STATA, vol==57)))
plot_data <- rbind(plot_data, inflate.plot.ci("Volume 58", subset(merged_STATA, vol==58)))
plot_data <- rbind(plot_data, inflate.plot.ci("Volume 59", subset(merged_STATA, vol==59)))
plot_data <- rbind(plot_data, inflate.plot.ci("American", subset(merged_STATA, subfield=="American")))
plot_data <- rbind(plot_data, inflate.plot.ci("Comparative", subset(merged_STATA, subfield=="Comparative")))
plot_data <- rbind(plot_data, inflate.plot.ci("IR", subset(merged_STATA, subfield=="IR")))
plot_data <- rbind(plot_data, inflate.plot.ci("Bivariate Specification is Bivariate", subset(merged_STATA, secondary_vars=="")))
plot_data <- rbind(plot_data, inflate.plot.ci("Bivariate Specification not Bivariate", subset(merged_STATA, secondary_vars!="")))
plot_data <- rbind(plot_data, inflate.plot.ci("Below Median Number of Controls", subset(merged_STATA, n_covariates < median_covar)))
plot_data <- rbind(plot_data, inflate.plot.ci("Above Median Number of Controls", subset(merged_STATA, n_covariates >= median_covar)))

plot_data <- as.data.frame(plot_data)
plot_data$mean <- as.numeric(as.character(plot_data$mean))
plot_data$sd <- as.numeric(as.character(plot_data$sd))
plot_data$ci <- as.numeric(as.character(plot_data$ci))
plot_data$x <- nrow(plot_data):1

ggplot(plot_data, aes(x=x, y=mean)) + geom_point(size=2) + geom_linerange(aes(ymin=mean - ci, ymax=mean + ci)) + 
  labs(y="Average p-value change from adding controls", x="", col=NULL) + 
  theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  geom_hline(yintercept=0, lty=2) + coord_flip(ylim=c(-.6, .1)) + 
  scale_x_continuous(breaks=1:nrow(plot_data), labels=plot_data$name[nrow(plot_data):1])
```

<!-- ```{r, echo=F, message=FALSE, warning=FALSE} -->
<!-- #amount of p-value change plot (by fixed effects, no fixed effects, volume, etc.) -->
<!-- merged_STATA <- inner_join(merged, STATA, by="author") -->
<!-- merged_STATA$fixed_effects.x[is.na(merged_STATA$fixed_effects.x)] <- 0 -->
<!-- merged_STATA$flip <- ifelse(sign(merged_STATA$bv_b)!=sign(merged_STATA$mv_sig_b), 1, 0) %>%
mutate(bv_t = bv_b/bv_se, mv_t = mv_b/mv_se, t_mvb_bvse = mv_b/bv_se, t_bvb_mvse = bv_b/mv_se,
         p_mvb_bvse = pnorm(-abs(mv_b/bv_se))*2,
         p_mvb_bvse2 = ifelse(sign(mv_p-bv_p)!=sign(p_mvb_bvse-bv_p), bv_p,
                              ifelse(bv_p<mv_p, ifelse(p_mvb_bvse>mv_p, mv_p, p_mvb_bvse), ifelse(p_mvb_bvse<mv_p, mv_p, p_mvb_bvse))),
         p_bvb_mvse = pnorm(-abs(bv_b/mv_se))*2)-->

<!-- merged_STATA$inflate_p <- ifelse(merged_STATA$flip==1, (merged_STATA$p_mvb_bvse -1) - (1-merged_STATA$bv_p), merged_STATA$p_mvb_bvse- merged_STATA$bv_p ) -->

<!-- inflate.plot.ci <- function(name, dat){ -->
<!--   #tt <- t.test(dat$bv_p, dat$mv_p_flip, paired=TRUE) -->
<!--   mean <- mean(dat$inflate_p) -->
<!--   sd <- sd(dat$inflate_p) -->
<!--   n <- nrow(dat) -->
<!--   ci <- 1.9 * sd / sqrt(n) -->
<!--   out <- c(paste0(name," (n=",n,")"), mean, sd, ci) -->
<!--   names(out) <- c("name", "mean", "sd", "ci") -->
<!--   return(out) -->
<!-- } -->

<!-- plot_data <- inflate.plot.ci("All", merged_STATA) -->
<!-- plot_data <- rbind(plot_data, inflate.plot.ci("Observational", subset(merged_STATA, type %in% c(0,4)))) -->
<!-- plot_data <- rbind(plot_data, inflate.plot.ci("Experimental", subset(merged_STATA, type %in% c(1:3)))) -->
<!-- plot_data <- rbind(plot_data, inflate.plot.ci("Obs., Bivariate Specif. Shown", subset(merged_STATA, type %in% c(0,4) & no_covar_presented > 0))) -->
<!-- plot_data <- rbind(plot_data, inflate.plot.ci("Obs., No Bivariate Specif. Shown", subset(merged_STATA, type %in% c(0,4) & no_covar_presented==0))) -->
<!-- plot_data <- rbind(plot_data, inflate.plot.ci("Obs., Fixed Effects", subset(merged_STATA, type %in% c(0,4) & fixed_effects.x>0))) -->
<!-- plot_data <- rbind(plot_data, inflate.plot.ci("Obs., No Fixed Effects", subset(merged_STATA, type %in% c(0,4) & fixed_effects.x==0))) -->
<!-- plot_data <- rbind(plot_data, inflate.plot.ci("Volume 57", subset(merged_STATA, vol==57))) -->
<!-- plot_data <- rbind(plot_data, inflate.plot.ci("Volume 58", subset(merged_STATA, vol==58))) -->
<!-- plot_data <- rbind(plot_data, inflate.plot.ci("Volume 59", subset(merged_STATA, vol==59))) -->
<!-- plot_data <- rbind(plot_data, inflate.plot.ci("American", subset(merged_STATA, subfield=="American"))) -->
<!-- plot_data <- rbind(plot_data, inflate.plot.ci("Comparative", subset(merged_STATA, subfield=="Comparative"))) -->
<!-- plot_data <- rbind(plot_data, inflate.plot.ci("IR", subset(merged_STATA, subfield=="IR"))) -->

<!-- plot_data <- as.data.frame(plot_data) -->
<!-- plot_data$mean <- as.numeric(as.character(plot_data$mean)) -->
<!-- plot_data$sd <- as.numeric(as.character(plot_data$sd)) -->
<!-- plot_data$ci <- as.numeric(as.character(plot_data$ci)) -->
<!-- plot_data$x <- nrow(plot_data):1 -->

<!-- ggplot(plot_data, aes(x=x, y=mean)) + geom_point(size=2) + geom_linerange(aes(ymin=mean - ci, ymax=mean + ci)) +  -->
<!--   labs(y="Average p-value change from coefficient change", x="", col=NULL) +  -->
<!--   theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + -->
<!--   geom_hline(yintercept=0, lty=2) + coord_flip() +  -->
<!--   scale_x_continuous(breaks=1:nrow(plot_data), labels=plot_data$name[nrow(plot_data):1]) -->
<!-- ``` -->




<!-- ```{r, echo=F, message=FALSE, warning=FALSE} -->
<!-- #by subfield -->
<!-- temp <- merged %>% filter(type==0|type==4) %>%  -->
<!--   mutate(cp = ifelse(no_covar_presented > 0, "Shown", "Bivariate Specification Not Shown")) -->
<!-- temp$flip <- ifelse(sign(temp$bv_b)!=sign(temp$mv_b), 1, 0) -->
<!-- #temp$bv_p[sign(temp$bv_b)!=sign(temp$mv_b)] <- 1 -->
<!-- temp$inflate_p <- ifelse(temp$flip==1, 1, temp$bv_p - temp$mv_p) -->
<!-- temp1 <- subset(temp, cp=="Shown") -->
<!-- temp1[order(temp1$inflate_p, decreasing=F),"x"]  <- 1:nrow(temp1) -->
<!-- temp2 <- subset(temp, cp=="Bivariate Specification Not Shown") -->
<!-- temp2[order(temp2$inflate_p, decreasing=F),"x"]  <- 1:nrow(temp2) -->
<!-- temp <- rbind(temp1 %>% select(x, bv_p, p_end=mv_p, cp, author, flip, subfield),  -->
<!--               temp2 %>% select(x, bv_p, p_end=mv_p, cp, author, flip, subfield)) -->
<!-- temp <- rbind(subset(temp, flip==0),  -->
<!--               subset(temp, flip==1) %>% mutate(x = c(28,30,32), p_end = 1),  -->
<!--               subset(temp, flip==1) %>% mutate(x = c(28.5,30.5,32.5), bv_p=1)) -->

<!-- ggplot(temp) + geom_segment(data=temp, aes(x=x,y=bv_p,yend=p_end,xend=x, col=subfield), inherit.aes=FALSE, arrow = arrow(length=unit(.05, 'inches'), type='closed')) + facet_grid(~ cp, scale="free_x", space="free_x") + theme_bw() + labs(x="", y="p-value") + geom_hline(yintercept=0.05, lty='dashed', col='grey') + theme(axis.ticks.x = element_blank(), axis.text.x = element_blank(), legend.title = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + scale_shape(solid = FALSE) + scale_fill_discrete(guide = guide_legend(reverse=TRUE)) -->
<!-- ``` -->

\newpage
# Figure S2: P-value Decreases from Coefficient Change Only
In the plot below, we show the average change in p-values from the bivariate to the full specification (with 95% confidence intervals) for observational and experimental studies due only to coefficient change. The figure shows that the majority of the p-value change shown in the figures above are due to increases in the magnitude of the coefficient rather than a reduction in the standard errors. This finding is robust across different subsets of studies.

&nbsp;&nbsp;

```{r, echo=F, message=FALSE, warning=FALSE}
#amount of p-value change plot (by fixed effects, no fixed effects, volume, etc.)
merged_STATA <- inner_join(merged, STATA, by="author")
merged_STATA$fixed_effects.x[is.na(merged_STATA$fixed_effects.x)] <- 0
merged_STATA$flip <- ifelse(sign(merged_STATA$bv_b)!=sign(merged_STATA$mv_b), 1, 0) 
merged_STATA <- merged_STATA %>% mutate(bv_t = bv_b/bv_se, mv_t = mv_b/mv_se, t_mvb_bvse = mv_b/bv_se, 
                         t_bvb_mvse = bv_b/mv_se, p_mvb_bvse = pnorm(-abs(mv_b/bv_se))*2, 
                         p_mvb_bvse2 = ifelse(sign(mv_p-bv_p)!=sign(p_mvb_bvse-bv_p), bv_p,
                                              ifelse(bv_p<mv_p, ifelse(p_mvb_bvse>mv_p, mv_p, p_mvb_bvse),
                                                     ifelse(p_mvb_bvse<mv_p, mv_p, p_mvb_bvse))),
         p_bvb_mvse = pnorm(-abs(bv_b/mv_se))*2)

merged_STATA$inflate_p <- ifelse(merged_STATA$flip==1, (merged_STATA$p_mvb_bvse -1) - (1-merged_STATA$bv_p), merged_STATA$p_mvb_bvse- merged_STATA$bv_p )
merged_STATA$n_covariates <- stringr::str_count(merged_STATA$covariates, " ") + stringr::str_count(merged_STATA$fixed_effects.y, " ")
median_covar <- median(merged_STATA$n_covariates)

inflate.plot.ci <- function(name, dat){
  #tt <- t.test(dat$bv_p, dat$mv_p_flip, paired=TRUE)
  mean <- mean(dat$inflate_p)
  sd <- sd(dat$inflate_p)
  n <- nrow(dat)
  ci <- 1.9 * sd / sqrt(n)
  out <- c(paste0(name," (n=",n,")"), mean, sd, ci)
  names(out) <- c("name", "mean", "sd", "ci")
  return(out)
}
plot_data <- inflate.plot.ci("All", merged_STATA)
plot_data <- rbind(plot_data, inflate.plot.ci("Observational", subset(merged_STATA, type %in% c(0,4))))
plot_data <- rbind(plot_data, inflate.plot.ci("Experimental", subset(merged_STATA, type %in% c(1:3))))
plot_data <- rbind(plot_data, inflate.plot.ci("Obs., Bivariate Specif. Shown", subset(merged_STATA, type %in% c(0,4) & no_covar_presented > 0)))
plot_data <- rbind(plot_data, inflate.plot.ci("Obs., No Bivariate Specif. Shown", subset(merged_STATA, type %in% c(0,4) & no_covar_presented==0)))
plot_data <- rbind(plot_data, inflate.plot.ci("Obs., Fixed Effects", subset(merged_STATA, type %in% c(0,4) & fixed_effects.x>0)))
plot_data <- rbind(plot_data, inflate.plot.ci("Obs., No Fixed Effects", subset(merged_STATA, type %in% c(0,4) & fixed_effects.x==0)))
plot_data <- rbind(plot_data, inflate.plot.ci("Volume 57", subset(merged_STATA, vol==57)))
plot_data <- rbind(plot_data, inflate.plot.ci("Volume 58", subset(merged_STATA, vol==58)))
plot_data <- rbind(plot_data, inflate.plot.ci("Volume 59", subset(merged_STATA, vol==59)))
plot_data <- rbind(plot_data, inflate.plot.ci("American", subset(merged_STATA, subfield=="American")))
plot_data <- rbind(plot_data, inflate.plot.ci("Comparative", subset(merged_STATA, subfield=="Comparative")))
plot_data <- rbind(plot_data, inflate.plot.ci("IR", subset(merged_STATA, subfield=="IR")))
plot_data <- rbind(plot_data, inflate.plot.ci("Below Median Number of Controls", subset(merged_STATA, n_covariates < median_covar)))
plot_data <- rbind(plot_data, inflate.plot.ci("Above Median Number of Controls", subset(merged_STATA, n_covariates >= median_covar)))

plot_data <- as.data.frame(plot_data)
plot_data$mean <- as.numeric(as.character(plot_data$mean))
plot_data$sd <- as.numeric(as.character(plot_data$sd))
plot_data$ci <- as.numeric(as.character(plot_data$ci))
plot_data$x <- nrow(plot_data):1

ggplot(plot_data, aes(x=x, y=mean)) + geom_point(size=2) + geom_linerange(aes(ymin=mean - ci, ymax=mean + ci)) + 
  labs(y="Average p-value change from coefficient change", x="", col=NULL) + 
  theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  geom_hline(yintercept=0, lty=2) + coord_flip(ylim=c(-.6, .1)) + 
  scale_x_continuous(breaks=1:nrow(plot_data), labels=plot_data$name[nrow(plot_data):1])
```





\newpage
# Figure S3: Control Effects in Bivariate Versus Full Specifications
As we discuss in the paper, one potential objection to our argument is that researchers can't justify/explain suppression effects because of the complexity of the multivariate space. This figure shows otherwise. It reveals that controls generally have similar effects on the key coefficient estimate in the simple bivariate case and in the complex multivariate case. Each point represents a control in a regression model, with the x value representing the effect of removing that control from the full specification and the y value representing the effect of adding it to the bivariate specification. All effects are standardized to a mean of zero and standard deviation of one. The dotted line shows the 45 degree line. The figure shows a strong relationship between these two, implying that explanations about the effect of controls on key effect estimates often translate from the bivariate to the full specification.

```{r, echo=FALSE, message=FALSE, warning=FALSE, fig.cap=""}
ggplot(data=sm_stack) + geom_point(aes(x=dropped_diff_std, y=added_diff_std), alpha=.3) + theme_classic() + geom_abline(slope=-1, intercept=0, lty=2) + 
  labs(x="Standardized effect of removing control from full specification", y='Standardized effect of adding control to bivariate specification')
```

<!-- \newpage -->
<!-- # Figure S4: Changes in Coefficients and Standard Errors -->


<!-- This figure shows the percent changes in coefficients and in standard errors across the studies. It excludes one outlier from the no bivariate specification category because it had 300% coefficient growth.  -->

<!-- ```{r, echo=FALSE, message=FALSE, warning=FALSE} -->
<!-- temp <- merged %>%  -->
<!--   filter(type==0|type==4) %>%  # Select observational studies (type 0) and natural experiments (type 4) -->
<!--   mutate(cp = ifelse(no_covar_presented > 0, "Shown", "Bivariate Specification Not Shown"), -->
<!--          bv_t = bv_b/bv_se,  -->
<!--          mv_t = mv_b/mv_se,  -->
<!--          t_mvb_bvse = mv_b/bv_se,  -->
<!--          t_bvb_mvse = bv_b/mv_se, -->
<!--          p_mvb_bvse = pnorm(-abs(mv_b/bv_se))*2,  -->
<!--          p_bvb_mvse = pnorm(-abs(bv_b/mv_se))*2, -->
<!--          flip = ifelse(sign(bv_b)!=sign(mv_b), 1, 0), -->
<!--          inflate_p = ifelse(flip==1, 1, bv_p - mv_p)) -->

<!-- temp1 <- subset(temp, cp=="Shown") -->
<!-- temp1[order(temp1$inflate_p, decreasing=F),"x"]  <- 1:nrow(temp1) -->
<!-- temp2 <- subset(temp, cp=="Bivariate Specification Not Shown") -->
<!-- temp2[order(temp2$inflate_p, decreasing=F),"x"]  <- 1:nrow(temp2) -->
<!-- temp <- rbind(temp1 %>%  -->
<!--                 select(author, x, y=inflate, cp, flip) %>%  -->
<!--                 mutate(ylabel="coefficient inflation (log + 1)"),  -->
<!--               temp2 %>%  -->
<!--                 select(author, x, y=inflate, cp, flip) %>%  -->
<!--                 mutate(ylabel="coefficient inflation (log + 1)"), -->
<!--               temp1 %>%  -->
<!--                 select(author, x, y=inflate_se, cp, flip) %>%  -->
<!--                 mutate(ylabel="standard error inflation (log + 1)"), -->
<!--               temp2 %>%  -->
<!--                 select(author, x, y=inflate_se, cp, flip) %>%  -->
<!--                 mutate(ylabel="standard error inflation (log + 1)")) -->

<!-- ggplot(temp %>% filter(author!="Hultman")) + geom_point(aes(x=x, y=y), inherit.aes=FALSE) + facet_grid(ylabel ~ cp, scale="free", space="free_x") + theme_bw() + labs(x="", y="") + geom_hline(yintercept=0, lty='dashed', col='grey') + theme(axis.ticks.x = element_blank(), axis.text.x = element_blank(), legend.title = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), legend.position = c(.25, .9), legend.text=element_text(size=7)) + scale_shape(solid = FALSE) + scale_color_manual(values=c("black", "grey")) + scale_fill_discrete(guide = guide_legend(reverse=TRUE)) -->
<!-- ``` -->
\newpage

# Figure S4: Changes in Coefficients across All Studies  
This figure is analogous to S1 but for coefficient changes instead of p-value changes. We calculate coefficient percent changes with log plus one.
&nbsp;&nbsp;

```{r, echo=F, message=FALSE, warning=FALSE}
#amount of p-value change plot (by fixed effects, no fixed effects, volume, etc.)

inflate.plot.ci <- function(name, dat){
  #tt <- t.test(dat$bv_p, dat$mv_p_flip, paired=TRUE)
  mean <- mean(dat$inflate)
  sd <- sd(dat$inflate)
  n <- nrow(dat)
  ci <- 1.9 * sd / sqrt(n)
  out <- c(paste0(name," (n=",n,")"), mean, sd, ci)
  names(out) <- c("name", "mean", "sd", "ci")
  return(out)
}
plot_data <- inflate.plot.ci("All", merged_STATA)
plot_data <- rbind(plot_data, inflate.plot.ci("Observational", subset(merged_STATA, type %in% c(0,4))))
plot_data <- rbind(plot_data, inflate.plot.ci("Experimental", subset(merged_STATA, type %in% c(1:3))))
plot_data <- rbind(plot_data, inflate.plot.ci("Obs., Bivariate Specif. Shown", subset(merged_STATA, type %in% c(0,4) & no_covar_presented > 0)))
plot_data <- rbind(plot_data, inflate.plot.ci("Obs., No Bivariate Specif. Shown", subset(merged_STATA, type %in% c(0,4) & no_covar_presented==0)))
plot_data <- rbind(plot_data, inflate.plot.ci("Obs., Fixed Effects", subset(merged_STATA, type %in% c(0,4) & fixed_effects.x>0)))
plot_data <- rbind(plot_data, inflate.plot.ci("Obs., No Fixed Effects", subset(merged_STATA, type %in% c(0,4) & fixed_effects.x==0)))
plot_data <- rbind(plot_data, inflate.plot.ci("Volume 57", subset(merged_STATA, vol==57)))
plot_data <- rbind(plot_data, inflate.plot.ci("Volume 58", subset(merged_STATA, vol==58)))
plot_data <- rbind(plot_data, inflate.plot.ci("Volume 59", subset(merged_STATA, vol==59)))
plot_data <- rbind(plot_data, inflate.plot.ci("American", subset(merged_STATA, subfield=="American")))
plot_data <- rbind(plot_data, inflate.plot.ci("Comparative", subset(merged_STATA, subfield=="Comparative")))
plot_data <- rbind(plot_data, inflate.plot.ci("IR", subset(merged_STATA, subfield=="IR")))
plot_data <- rbind(plot_data, inflate.plot.ci("Below Median Number of Controls", subset(merged_STATA, n_covariates < median_covar)))
plot_data <- rbind(plot_data, inflate.plot.ci("Above Median Number of Controls", subset(merged_STATA, n_covariates >= median_covar)))


plot_data <- as.data.frame(plot_data)
plot_data$mean <- as.numeric(as.character(plot_data$mean))
plot_data$sd <- as.numeric(as.character(plot_data$sd))
plot_data$ci <- as.numeric(as.character(plot_data$ci))
plot_data$x <- nrow(plot_data):1

ggplot(plot_data, aes(x=x, y=mean)) + geom_point(size=2) + geom_linerange(aes(ymin=mean - ci, ymax=mean + ci)) + 
  labs(y="Average coefficient change", x="", col=NULL) + 
  theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  geom_hline(yintercept=0, lty=2) + coord_flip() + 
  scale_x_continuous(breaks=1:nrow(plot_data), labels=plot_data$name[nrow(plot_data):1])
```

\newpage


# Figure S5: Changes in Coefficients that Do Not Show Bivariate
This figure is analogous to S2 but for coefficient changes instead of p-value changes. We calculate coefficient percentchanges with log plus one.
&nbsp;&nbsp;

```{r, echo=F, message=FALSE, warning=FALSE}
#amount of p-value change plot (by fixed effects, no fixed effects, volume, etc.)

inflate.plot.ci <- function(name, dat){
  #tt <- t.test(dat$bv_p, dat$mv_p_flip, paired=TRUE)
  mean <- mean(dat$inflate)
  sd <- sd(dat$inflate)
  n <- nrow(dat)
  ci <- 1.9 * sd / sqrt(n)
  out <- c(paste0(name," (n=",n,")"), mean, sd, ci)
  names(out) <- c("name", "mean", "sd", "ci")
  return(out)
}
merged_STATA <- merged_STATA %>% filter(no_covar_presented==0)
plot_data <- inflate.plot.ci("All", merged_STATA)
plot_data <- rbind(plot_data, inflate.plot.ci("Observational", subset(merged_STATA, type %in% c(0,4))))
plot_data <- rbind(plot_data, inflate.plot.ci("Experimental", subset(merged_STATA, type %in% c(1:3))))
plot_data <- rbind(plot_data, inflate.plot.ci("Obs., Fixed Effects", subset(merged_STATA, type %in% c(0,4) & fixed_effects.x>0)))
plot_data <- rbind(plot_data, inflate.plot.ci("Obs., No Fixed Effects", subset(merged_STATA, type %in% c(0,4) & fixed_effects.x==0)))
plot_data <- rbind(plot_data, inflate.plot.ci("Volume 57", subset(merged_STATA, vol==57)))
plot_data <- rbind(plot_data, inflate.plot.ci("Volume 58", subset(merged_STATA, vol==58)))
plot_data <- rbind(plot_data, inflate.plot.ci("Volume 59", subset(merged_STATA, vol==59)))
plot_data <- rbind(plot_data, inflate.plot.ci("American", subset(merged_STATA, subfield=="American")))
plot_data <- rbind(plot_data, inflate.plot.ci("Comparative", subset(merged_STATA, subfield=="Comparative")))
plot_data <- rbind(plot_data, inflate.plot.ci("IR", subset(merged_STATA, subfield=="IR")))
plot_data <- rbind(plot_data, inflate.plot.ci("Below Median Number of Controls", subset(merged_STATA, n_covariates < median_covar)))
plot_data <- rbind(plot_data, inflate.plot.ci("Above Median Number of Controls", subset(merged_STATA, n_covariates >= median_covar)))

plot_data <- as.data.frame(plot_data)
plot_data$mean <- as.numeric(as.character(plot_data$mean))
plot_data$sd <- as.numeric(as.character(plot_data$sd))
plot_data$ci <- as.numeric(as.character(plot_data$ci))
plot_data$x <- nrow(plot_data):1

ggplot(plot_data, aes(x=x, y=mean)) + geom_point(size=2) + geom_linerange(aes(ymin=mean - ci, ymax=mean + ci)) + 
  labs(y="Average coefficient change", x="", col=NULL) + 
  theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  geom_hline(yintercept=0, lty=2) + coord_flip() + 
  scale_x_continuous(breaks=1:nrow(plot_data), labels=plot_data$name[nrow(plot_data):1])
```

\newpage
# Figure S6: T-Value Changes in Observational Studies

```{r, echo=FALSE, message=FALSE, warning=FALSE}
temp <- merged %>% filter(type==0|type==4) %>% 
  mutate(cp = ifelse(no_covar_presented > 0, "Shown", "Bivariate Specification Not Shown"))
temp$bv_t <- abs(temp$bv_b/temp$bv_se)
temp$mv_t <- abs(temp$mv_b/temp$mv_se)
temp$bv_t <- ifelse(sign(temp$bv_b)==sign(temp$mv_b), temp$bv_t, temp$bv_t * -1)

temp$inflate_t <- temp$bv_t - temp$mv_t
temp$inflate_t <- ifelse(sign(temp$bv_t)==sign(temp$mv_t), temp$inflate_t, -1000)
temp1 <- subset(temp, cp=="Shown")
temp1[order(temp1$bv_t, decreasing=T),"x"]  <- 1:nrow(temp1)
temp2 <- subset(temp, cp=="Bivariate Specification Not Shown")
temp2[order(temp2$bv_t, decreasing=T),"x"]  <- 1:nrow(temp2)
temp <- rbind(temp1 %>% select(x, bv_t, p_end=mv_t, cp), 
              temp2 %>% select(x, bv_t, p_end=mv_t, cp))

ggplot(temp) + geom_segment(data=temp, aes(x=x,y=bv_t,yend=p_end,xend=x, col=ifelse(sign(p_end)==sign(bv_t), 'black', 'red')), inherit.aes=FALSE, arrow = arrow(length=unit(.05, 'inches'), type='closed')) + facet_grid(~ cp, scale="free_x", space="free_x") + theme_bw() + labs(x="", y="t statistic") + geom_hline(yintercept=0, lty='dashed', col='black') + geom_hline(yintercept=1.96, lty='dashed', col='grey') + geom_hline(yintercept=-1.96, lty='dashed', col='grey') + theme(axis.ticks.x = element_blank(), axis.text.x = element_blank(), legend.title = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), legend.position = "none") + scale_color_manual(values=c("black", "red")) + scale_shape(solid = FALSE) + scale_fill_discrete(guide = guide_legend(reverse=TRUE)) + ylim(-4,14)
```

\newpage
# Figure S7a: Confidence Intervals of Bivariate and Full Specifications 

```{r, echo=FALSE, message=FALSE, warning=FALSE}
ggplot(data = full_check %>%
         mutate(p_change = ifelse(bv_p>=.05 & mv_p<.05, 'p < 0.05 depends on covariates', 'significant without covariates'))) + 
  geom_vline(xintercept=0, alpha=.5, lty=2) + 
  geom_hline(yintercept=0, alpha=.5, lty=2) + 
  geom_abline(slope=1, alpha=.5, lty=2) + 
  geom_segment(aes(x=bv_b, xend=bv_b, y=mv_b - mv_se*1.96, yend=mv_b + mv_se*1.96, col=p_change)) +
  geom_segment(aes(x=bv_b - mv_se*1.96, xend=bv_b  + mv_se*1.96, y=mv_b, yend=mv_b, col=p_change)) +
  labs(x = 'Bivariate Specification 95% Confidence Interval', y = 'Full Specification 95% Confidence Interval') + 
  theme_classic() + 
  scale_color_manual(values=c('black', 'grey')) + 
  theme(legend.position='bottom', legend.title=element_blank())
```

\newpage

# Figure S7b: Confidence Intervals of Bivariate and Full Specifications (Normed Absolute Value Coefficients)

Negative vales are from cases that switch signs. All variables recoded to 0-1.

```{r, echo=FALSE, message=FALSE, warning=FALSE}
ggplot(data = full_check %>%
         mutate(p_change = ifelse(bv_p>=.05 & mv_p<.05, 'p < 0.05 depends on covariates', 'significant without covariates'),
                sign_b = sign(bvn_b),
                sign_m = sign(mvn_b)))+ 
  geom_vline(xintercept=0, alpha=.5, lty=2) + 
  geom_hline(yintercept=0, alpha=.5, lty=2) + 
  geom_abline(slope=1, alpha=.5, lty=2) + 
  geom_segment(aes(x=bvn_b * sign_b, xend=bvn_b*sign_b, y=(mvn_b - mvn_se*1.96)*sign_b, yend=(mvn_b + mvn_se*1.96)*sign_b, col=p_change)) +
  geom_segment(aes(x=(bvn_b - bvn_se*1.96)*sign_b, xend=(bvn_b  + bvn_se*1.96)*sign_b, y=mvn_b*sign_b, yend=mvn_b*sign_b, col=p_change)) +
  labs(x = 'Bivariate Specification 95% Confidence Interval', y = 'Full Specification 95% Confidence Interval') + 
  theme_classic() + 
  xlim(-3, 10) + 
  ylim(-3, 10) + 
  scale_color_manual(values=c('black', 'grey')) + 
  theme(legend.position='bottom', legend.title=element_blank())
```

\newpage
# Figure S7c: Confidence Intervals of Bivariate and Full Specifications (Absolute Value Beta Coefficients)

All variables standardized so that their variances are 1. Negative vales are from cases that switch signs. 

```{r, echo=FALSE, message=FALSE, warning=FALSE}
ggplot(data = full_check %>%
         mutate(p_change = ifelse(bv_p>=.05 & mv_p<.05, 'p < 0.05 depends on covariates', 'significant without covariates'),
                sign_b = sign(bvn_b),
                bv_beta_se = bv_beta/bv_b*bv_se,
                mv_beta_se = mv_beta/mv_b*mv_se))+ 
  geom_vline(xintercept=0, alpha=.5, lty=2) + 
  geom_hline(yintercept=0, alpha=.5, lty=2) + 
  geom_abline(slope=1, alpha=.5, lty=2) + 
  geom_segment(aes(x=bv_beta * sign_b, xend=bv_beta*sign_b, y=(mv_beta - mv_beta_se*1.96)*sign_b, yend=(mv_beta + mv_beta_se*1.96)*sign_b, col=p_change)) +
  geom_segment(aes(x=(bv_beta - bv_beta_se*1.96)*sign_b, xend=(bv_beta  + bv_beta_se*1.96)*sign_b, y=mv_beta*sign_b, yend=mv_beta*sign_b, col=p_change)) +
  labs(x = 'Bivariate Specification 95% Confidence Interval', y = 'Full Specification 95% Confidence Interval') + 
  theme_classic() + 
  xlim(-1, 9) + 
  ylim(-1, 9) + 
  scale_color_manual(values=c('black', 'grey')) + 
  theme(legend.position='bottom', legend.title=element_blank())
```

\newpage
# Figure S7d: Confidence Intervals of Bivariate and Full Specifications (Absolute Value Beta Coefficients--Zoomed)

All variables standardized so that their variances are 1. Negative vales are from cases that switch signs. 

```{r, echo=FALSE, message=FALSE, warning=FALSE}
ggplot(data = full_check %>%
         mutate(p_change = ifelse(bv_p>=.05 & mv_p<.05, 'p < 0.05 depends on covariates', 'significant without covariates'),
                sign_b = sign(bvn_b),
                bv_beta_se = bv_beta/bv_b*bv_se,
                mv_beta_se = mv_beta/mv_b*mv_se))+ 
  geom_vline(xintercept=0, alpha=.5, lty=2) + 
  geom_hline(yintercept=0, alpha=.5, lty=2) + 
  geom_abline(slope=1, alpha=.5, lty=2) + 
  geom_segment(aes(x=bv_beta * sign_b, xend=bv_beta*sign_b, y=(mv_beta - mv_beta_se*1.96)*sign_b, yend=(mv_beta + mv_beta_se*1.96)*sign_b, col=p_change)) +
  geom_segment(aes(x=(bv_beta - bv_beta_se*1.96)*sign_b, xend=(bv_beta  + bv_beta_se*1.96)*sign_b, y=mv_beta*sign_b, yend=mv_beta*sign_b, col=p_change)) +
  labs(x = 'Bivariate Specification 95% Confidence Interval', y = 'Full Specification 95% Confidence Interval') + 
  theme_classic() + 
  coord_cartesian(xlim=c(0,1), ylim=c(0,1)) +
  scale_color_manual(values=c('black', 'grey')) + 
  theme(legend.position='bottom', legend.title=element_blank())
```

\newpage
# Figure S8: Multicollinearity in Observational Studies

The figure below shows the correlation between the key variable of interest and each control variable for every observational study. This figure follows the same design as Figures 2 and 3, which present our main findings, including the exact same ordering of studies. As we report in the paper, we find suppression effects helping authors achieve statistical significance on the far right side of this figure. If collinearity was contributing, we would expect to see higher correlations on the far right of this figure. As the figure shows, however, we see no such pattern. 

```{r, echo=FALSE, message=FALSE, warning=FALSE}
ggplot(data=cov_corr %>% 
         filter(keyvar==1) %>%
         left_join(info %>% rename(author=first_author)) %>%
         mutate(disclosure = ifelse(no_covar_presented==0|is.na(no_covar_presented), "Not Shown", "Bivariate Specification Shown"))) + 
  geom_point(aes(x=author, y =corr), alpha=.2) + 
  theme_classic() + 
  facet_grid(~disclosure, scale="free_x", space="free_x") + 
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank()) + 
  labs(x='', y='Correlation Between Key Variable\nand Controls in Full Specification')
```


\newpage
# Specification Uncertainty
We conducted analyses to assess specification uncertainty from control choice, but concluded that these analyses were not especially informative. Here we present two of these analyses.

## Statistically Significant in All Specifications
In the figure below, we present for each article the percent of all possible control combinations in which the key effect estimate was statistically significant (at the 0.05 level). Each dot represents one study and the studies are sorted by percent significant. The figure reveals that about a third of the studies were significant across all specifications. The other two thirds vary considerably with some being rarely statistically significant or never statistically significant. In studies with a large number of controls, we randomly selected control combinations.

&nbsp;

```{r, echo=F, message=FALSE, warning=FALSE}
stacked_estimates2 <- inner_join(stacked_estimates, bv_mv)


# calculating bounds (min and max of spec curve) and percentage significant
leamer_bounds <- stacked_estimates2 %>%
  group_by(author) %>%
  summarise(leamer_min=min(estimate, na.rm=T), 
            leamer_max=max(estimate, na.rm=T), percent_sig = mean(pval<.05, na.rm=T)*100,
            percent_sign = mean(sign(estimate)==sign(mv_b), na.rm=T) *100,
            mv_percentile = sum(abs(mv_b) > ifelse(sign(estimate)==sign(mv_b), abs(estimate), abs(estimate)*-1)) / length(author) * 100,
            bv_percentile = sum(ifelse(sign(bv_b)==sign(mv_b), abs(bv_b), abs(bv_b)*-1) > ifelse(sign(estimate)==sign(mv_b), abs(estimate), abs(estimate)*-1)) / length(author)* 100)

leamer_bounds$min_max_sign <- (sign(leamer_bounds$leamer_min)==sign(leamer_bounds$leamer_max))

# calculating mean and SD from specification curve
athey_imbens_sd <- as.data.frame(unique(stacked_estimates$author))
colnames(athey_imbens_sd) <- "author"
athey_imbens_sd$ai_sd <- NA

for(i in 1:nrow(athey_imbens_sd)){
  temp_author <- as.character(athey_imbens_sd$author[i])
  
  athey_imbens_sd$ai_sdmv[i] <- sum((subset(stacked_estimates, author==temp_author)$estimate - subset(bv_mv, author==temp_author)$mv_b)^2, na.rm=T) / sum(!is.na(subset(stacked_estimates, author==temp_author)$estimate))
  athey_imbens_sd$ai_sd[i] <- sd(subset(stacked_estimates, author==temp_author)$estimate, na.rm=T)
  athey_imbens_sd$ai_mean[i] <- mean(subset(stacked_estimates, author==temp_author)$estimate, na.rm=T)
  athey_imbens_sd$ai_med[i] <- median(subset(stacked_estimates, author==temp_author)$estimate, na.rm=T)
}

# merging into full data
plot_data <- left_join(full_check, leamer_bounds, by="author")
plot_data <- left_join(plot_data, athey_imbens_sd, by="author")

plot_data <- plot_data %>% mutate(mv_bp = ifelse(mv_b<0, mv_b*-1,mv_b), #recode so that multivariate is always positive and 
                 bv_bp = ifelse(mv_b<0, bv_b*-1,bv_b), # if Multivariate is negative, Flip sign Of bivariate
                 med_bp = ifelse(mv_b<0, ai_med*-1, ai_med), # if Multivariate is negative, Flip sign Of median
                 mv_bp = ifelse(bv_bp<0, mv_bp+bv_bp, mv_bp), #If bivariate and multivariate change signs, which only happens if Bivariate is less than zero
                 bv_bp = ifelse(bv_bp<0, bv_bp+bv_bp, bv_bp), mv_bp = ifelse(med_bp<0, mv_bp+med_bp, mv_bp), med_bp = ifelse(med_bp<0, med_bp+med_bp, med_bp),
                 inflate = (log(mv_bp+1) - log(bv_bp+1))*100,
                 inflate_nolog = ifelse(sign(mv_b)==sign(bv_b), (abs(mv_b) - abs(bv_b))/abs(bv_b), (abs(mv_b) + abs(bv_b))/abs(bv_b)) * 100,
                 inflate_curve_mean = ifelse(sign(mv_b)==sign(ai_mean), (abs(mv_b) - abs(ai_mean))/abs(ai_mean), (abs(mv_b) + abs(ai_mean))/abs(ai_mean)) * 100,
                 inflate_curve_median = ifelse(sign(mv_b)==sign(ai_med), (abs(mv_b) - abs(ai_med))/abs(ai_med), (abs(mv_b) + abs(ai_med))/abs(ai_med)) * 100,
                 inflate_curve_median_log = (log(mv_bp+1) - log(med_bp+1))*100)

# percent of combinations significant
plot_data$sd_diff_mean <- abs(plot_data$mv_b - plot_data$ai_mean)/plot_data$ai_sd
plot(sort(plot_data$percent_sig), pch=19, frame=F, cex=.5, xaxt='n', xlab="", 
     ylab="Percent of Significant Alternative Specifications", 
     main="")
```

\newpage
## Standard Deviation of Estimates
In the figure below, we present the standard deviation of the key variable's coefficient estimate across all possible control combinations. To scale them, we present a ratio: this standard deviation over the standard error of the key estimate (from the full specification). This ratio captures the degree to which specification uncertainty (from control choice) is larger or smaller than uncertainty reported by the articles' standard errors on the key estimates. Ratios of one, which are noted by the dotted line in the figure, show articles where the standard deviation across all possible estimates is about the same size as the standard error. If one believes that this standard deviation captures specification uncertainty, then ratios of one also imply that findings have about twice the uncertainty they report in their standard errors (see the previous page for why you should probably not draw this conclusion). The figure shows several articles that have truly massive amounts of uncertainty relative to the standard errors they report (as much as ninefold), but more than half have substantially less uncertainty from control choice than reflected in their standard errors.

&nbsp;

```{r, echo=F, message=FALSE, warning=FALSE}
plot_data$ratio <- plot_data$ai_sd / plot_data$mv_se
plot(sort(plot_data$ratio), frame=F, pch=19, cex=.5, xlab="", ylab="Standard Deviation Ratio", main="", xaxt='n')
abline(h=1, lty=2)
```


\newpage

# Analyses Specified in Pre-Analysis Plan 

## Inflating with Controls
### Plots of Key Coefficient under Full and Bivariate Specifications
The first of set of statistics we will estimate is straightforward. How much does the inclusion of the controls used in researchers' main model change the main finding, compared to a model without controls? We will examine both the effect size estimate and the p-value. Often, researchers never report the estimate without controls (and we will code how often they do). This comparison will give us an overall sense of how often controls inflate estimates.

Since we did not know what the variation of the coefficients would look like, we prespecified three plots comparing the bivariate (often bivariate) and multivariate coefficients for each paper

- The raw coefficients
- standardized beta coefficients, and
- the coefficients after rescaling variables to vary between zero and one.

The next three plots show those results. Unfortunately, the huge range and scales makes them almost impossible to interpret.
\newpage

### Key Estimate Under Full and Bivariate Specifications

```{r, echo=FALSE, message=FALSE, warning=FALSE,,fig.asp = 1}
plot(x=merged$bv_b, y=merged$mv_b, 
     xlab="Bivariate Specification Coefficient", ylab="Full Specification Coefficient",
     pch=16, cex=.5, frame=F); abline(a=0, b=1, lty=2, col='grey'); axis(side=2)
```
\newpage

### Key Estimate Under Full and Bivariate Specifications (Beta Coefficients)

```{r, echo=FALSE, message=FALSE, warning=FALSE,fig.asp = 1}
 plot(x=merged$bv_beta, y=merged$mv_beta, 
      xlab="Bivariate Specification Beta Coefficient", ylab="Full Specification Beta Coefficient",
      pch=16, cex=.5, frame=F); abline(a=0, b=1, lty=2, col='grey'); axis(side=2)
```
\newpage

### Key Estimate Under Full and Bivariate Specifications (Normed Coefficients)

```{r, echo=FALSE, message=FALSE, warning=FALSE,fig.asp = 1}
 plot(x=merged$bvn_b, y=merged$mvn_b, 
      xlab="Bivariate Specification Beta Coefficient", ylab="Full Specification Beta Coefficient",
      pch=16, cex=.5, frame=F); abline(a=0, b=1, lty=2, col='grey'); axis(side=2)
```
\newpage

### Descriptive Statistics on Inflation

- Percent of studies where the coefficient of the key estimate under full specification inflates over the bivariate specification: `r round(mean(bv_mv$bv_bp<bv_mv$mv_bp) * 100,0)`% (`r sum(bv_mv$bv_bp<bv_mv$mv_bp)` of `r nrow(bv_mv)`). Note that this includes all studies. When limited to studies where a bivariate specification is not shown, the percentage is `r round(mean(merged[merged$no_covar_presented==0, 'bv_bp']<merged[merged$no_covar_presented==0, 'mv_bp']) * 100,0)`%. When further limited to observational studies that do not show a bivariate specification, the percentage is `r round(mean(merged[merged$no_covar_presented==0 & merged$type %in% c(0,4), 'bv_bp']<merged[merged$no_covar_presented==0 & merged$type %in% c(0,4), 'mv_bp']) * 100,0) `%. When further limited to studies whose bivariate specification had a p-value greater than 0.05, the percentage is `r round(mean(merged[merged$no_covar_presented==0 & merged$type %in% c(0,4) & merged$bv_p>.05, 'bv_bp']<merged[merged$no_covar_presented==0 & merged$type %in% c(0,4) & merged$bv_p>.05, 'mv_bp']) * 100,0)`%. 

- Percent of studies where p-value of key estimate does not achieve statistical significance under bivariate specification, but does in full specification: `r round(with(bv_mv,sum(bv_p>=.05 & mv_p<.05))/nrow(bv_mv) * 100, 0)`% (`r with(bv_mv,sum(bv_p>=.05 & mv_p<.05))` of `r nrow(bv_mv)`). `r round(sum(bv_mv$bv_p<.05)/nrow(bv_mv) * 100,0)`% of bivariate specification key estimates achieve statistical significance, `r round(sum(bv_mv$mv_p<.05)/nrow(bv_mv) * 100,0)`% of full specification key estimates achieve statistical significance at p<0.05).

- Percent of models where the sign on the key estimate is different in the full and bivariate specifications `r round(with(bv_mv, sum(sign(bv_b)!=sign(mv_b)))/nrow(bv_mv) * 100,0)`% (`r round(with(bv_mv, sum(sign(bv_b)!=sign(mv_b) & mv_p<.05 & bv_p<.05))/nrow(bv_mv) * 100,0)`% where both estimates are statistically significant).

## Sensitivity to Control Choice
```{r, echo=FALSE, message=FALSE, warning=FALSE}

stacked_estimates2 <- inner_join(stacked_estimates, bv_mv)

# calculating bounds (min and max of spec curve) and percentage significant
leamer_bounds <- stacked_estimates2 %>%
  group_by(author) %>%
  summarise(leamer_min=min(estimate, na.rm=T), 
            leamer_max=max(estimate, na.rm=T), percent_sig = mean(pval<.05, na.rm=T)*100,
            percent_sign = mean(sign(estimate)==sign(mv_b), na.rm=T) *100,
            percent_sig_sign = mean(pval<.05 & sign(estimate)==sign(mv_b), na.rm=T)*100,
            mv_percentile = sum(abs(mv_b) > ifelse(sign(estimate)==sign(mv_b), abs(estimate), abs(estimate)*-1)) / length(author) * 100,
            mv_percentile_sig = sum(abs(mv_b) > ifelse(sign(estimate)==sign(mv_b), abs(estimate), abs(estimate)*-1) & pval<0.05) / length(author) * 100,
            bv_percentile = sum(ifelse(sign(bv_b)==sign(mv_b), abs(bv_b), abs(bv_b)*-1) > ifelse(sign(estimate)==sign(mv_b), abs(estimate), abs(estimate)*-1)) / length(author)* 100)
leamer_bounds$min_max_sign <- (sign(leamer_bounds$leamer_min)==sign(leamer_bounds$leamer_max))

# calculating mean and SD from specification curve
athey_imbens_sd <- as.data.frame(unique(stacked_estimates$author))
colnames(athey_imbens_sd) <- "author"
athey_imbens_sd$ai_sd <- NA

for(i in 1:nrow(athey_imbens_sd)){
  temp_author <- as.character(athey_imbens_sd$author[i])
  
  athey_imbens_sd$ai_sdmv[i] <- sum((subset(stacked_estimates, author==temp_author)$estimate - subset(bv_mv, author==temp_author)$mv_b)^2, na.rm=T) / sum(!is.na(subset(stacked_estimates, author==temp_author)$estimate))
  athey_imbens_sd$ai_sd[i] <- sd(subset(stacked_estimates, author==temp_author)$estimate, na.rm=T)
  athey_imbens_sd$ai_mean[i] <- mean(subset(stacked_estimates, author==temp_author)$estimate, na.rm=T)
  athey_imbens_sd$ai_med[i] <- median(subset(stacked_estimates, author==temp_author)$estimate, na.rm=T)
}

# merging into full data
plot_data <- left_join(full_check, leamer_bounds, by="author")
plot_data <- left_join(plot_data, athey_imbens_sd, by="author")
```

- `r round(mean(leamer_bounds$percent_sign),0)`% of alternative specifications have a key effect coefficient of the same sign as the full specification.
- `r round(mean(leamer_bounds$percent_sig_sign),0)`% of alternative specifications have a key effect coefficient of the same sign as the full specification that is statistically significant.
- `r round(mean(leamer_bounds$mv_percentile),0)`% of alternative specifications have a smaller magnitude key effect coefficient size than the full specification.
- `r round(mean(leamer_bounds$mv_percentile_sig),0)`% of alternative specifications have a smaller magnitude key effect coefficient size than the full specification that is statistically significant.
- `r round(mean(abs(leamer_bounds$bv_percentile - leamer_bounds$mv_percentile)),0)`% of alternative specifications have a key effect coefficient size between full and bivariate specifications.
- `r round(mean(leamer_bounds$min_max_sign)*100, 0)`% of studies have a minimum and maximum possible key effect of the same sign.
- The average standard deviation of estimates around the full specification across studies is `r round(mean(plot_data$ai_sdmv, na.rm=T),3)`.

## Deflators and Inflators

```{r, echo=F, message=FALSE, warning=FALSE}
inf_reg_summary <- inflator_reg %>% 
  group_by(author) %>%
  summarise(pos = sum(sign(b)==1),
            n=n())
```
- Percent of controls that inflate the estimate of the key variable on average; unweighted: `r round(sum(inflator_reg$b>0)/nrow(inflator_reg) * 100, 0)`%, weighted by study: `r round(mean(inf_reg_summary$pos/inf_reg_summary$n, na.rm=T) * 100,0)`%.

- Percent of controls that statistically significantly inflate the estimate of the key variable on average; unweighted: `r round(with(inflator_reg, sum(b>0 & p<.05)/nrow(inflator_reg))* 100,0) `%.

&nbsp;

```{r, echo=F, message=FALSE, warning=FALSE}
inflator_reg2 <- inflator_reg %>%
  full_join(bv_mv %>% select(author, mv_b))
inflator_reg2$b_inf <- ifelse(inflator_reg2$mv_b>0, inflator_reg2$b, inflator_reg2$b*-1)
inflator_reg2$beta_inf <- ifelse(inflator_reg2$mv_b>0, inflator_reg2$beta*1, inflator_reg2$beta*-1)
inflator_reg2 <- inflator_reg2[!is.na(inflator_reg2$mv_b),]

hist(inflator_reg2$beta_inf, main="Distribution of Inflating and Deflating Controls", xlab="Standardized Effect of Control on Key Variable Coefficient", breaks=30)
abline(v=0, lwd=2, col="red")
legend("topleft",legend=c(paste0(round(sum(inflator_reg$b>0)/nrow(inflator_reg) * 100, 0), "% of Controls Inflate"), "the Result on Average"), bty="n")
```

# Articles' Text about Controls


```{r, echo=F, message=FALSE, warning=FALSE}

c<-info$text_covariates[(info$type==0 |info$type==4)& info$status=="Complete"&info$no_covar_presented==0]
```

This section presents the text about controls from observational articles that did not present a bivariate specification.

1

`r c[1]`

2

`r c[2]`

3

`r c[3]`

4

`r c[4]`

5

`r c[5]`

6

`r c[6]`

7

`r c[7]`

8

`r c[8]`

9

`r c[9]`

10

`r c[10]`

11

`r c[11]`

12

`r c[12]`

13

`r c[13]`

14

`r c[14]`

15

`r c[15]`

16

`r c[16]`

17

`r c[17]`

18

`r c[18]`

19

`r c[19]`

20

`r c[20]`

21

`r c[21]`

22

`r c[22]`

23

`r c[23]`

24

`r c[24]`

25

`r c[25]`

26

`r c[26]`

27

`r c[27]`

28

`r c[28]`

29

`r c[29]`

30

`r c[30]`

31

`r c[31]`

32

`r c[32]`



